In [1]:
#importing all the required libraries
import pandas as pd
import numpy as np
import os

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style

np.random.seed(42)
from datetime import datetime, timedelta
%matplotlib inline
In [2]:
df  = pd.read_csv('Sample store final.csv', error_bad_lines=False, engine='python')
df
Out[2]:
Row_ID Order_ID Order_Date Ship_Date Ship_Mode Customer_ID Customer_Name Segment Country City ... Order_Time Month_of_Order Week_of_Year_for_Order Quarter_of_Year_for_Order Year_of_shipping_date Quarter_of_shipping_date Week_of_year_of_shipment Month_of_shipment Day_of_shipment Day_of_order
0 7981 CA-2014-103800 1/3/2014 0:00 1/7/2014 0:00 Standard Class DP-13000 Darren Powers Consumer United States Houston ... 12:00:00 AM January 1 1 2014 1 2 January Tuesday Friday
1 740 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
2 741 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
3 742 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
4 1760 CA-2014-141817 1/5/2014 0:00 1/12/2014 0:00 Standard Class MB-18085 Mick Brown Consumer United States Philadelphia ... 12:00:00 AM January 2 1 2014 1 3 January Sunday Sunday
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9989 908 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9990 909 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9991 1297 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9992 1298 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9993 5092 CA-2017-156720 12/30/2017 0:00 1/3/2018 0:00 Standard Class JM-15580 Jill Matthias Consumer United States Loveland ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday

9994 rows × 32 columns

In [3]:
df.columns = df.columns.to_series().apply(lambda x: x.strip())
In [4]:
df = df.copy()
In [5]:
df.head(5)
Out[5]:
Row_ID Order_ID Order_Date Ship_Date Ship_Mode Customer_ID Customer_Name Segment Country City ... Order_Time Month_of_Order Week_of_Year_for_Order Quarter_of_Year_for_Order Year_of_shipping_date Quarter_of_shipping_date Week_of_year_of_shipment Month_of_shipment Day_of_shipment Day_of_order
0 7981 CA-2014-103800 1/3/2014 0:00 1/7/2014 0:00 Standard Class DP-13000 Darren Powers Consumer United States Houston ... 12:00:00 AM January 1 1 2014 1 2 January Tuesday Friday
1 740 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
2 741 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
3 742 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
4 1760 CA-2014-141817 1/5/2014 0:00 1/12/2014 0:00 Standard Class MB-18085 Mick Brown Consumer United States Philadelphia ... 12:00:00 AM January 2 1 2014 1 3 January Sunday Sunday

5 rows × 32 columns

In [6]:
print(list(df.columns))
['Row_ID', 'Order_ID', 'Order_Date', 'Ship_Date', 'Ship_Mode', 'Customer_ID', 'Customer_Name', 'Segment', 'Country', 'City', 'State', 'Postal_Code', 'Region', 'Product_ID', 'CategoryName', 'Sub-Category', 'Product_Name', 'Sales', 'Quantity', 'Discount', 'Profit', 'Year_of_Order', 'Order_Time', 'Month_of_Order', 'Week_of_Year_for_Order', 'Quarter_of_Year_for_Order', 'Year_of_shipping_date', 'Quarter_of_shipping_date', 'Week_of_year_of_shipment', 'Month_of_shipment', 'Day_of_shipment', 'Day_of_order']
In [7]:
df.dtypes
Out[7]:
Row_ID                         int64
Order_ID                      object
Order_Date                    object
Ship_Date                     object
Ship_Mode                     object
Customer_ID                   object
Customer_Name                 object
Segment                       object
Country                       object
City                          object
State                         object
Postal_Code                    int64
Region                        object
Product_ID                    object
CategoryName                  object
Sub-Category                  object
Product_Name                  object
Sales                        float64
Quantity                       int64
Discount                     float64
Profit                       float64
Year_of_Order                  int64
Order_Time                    object
Month_of_Order                object
Week_of_Year_for_Order         int64
Quarter_of_Year_for_Order      int64
Year_of_shipping_date          int64
Quarter_of_shipping_date       int64
Week_of_year_of_shipment       int64
Month_of_shipment             object
Day_of_shipment               object
Day_of_order                  object
dtype: object
In [8]:
sales_per_purchase_month = df.groupby(['Month_of_Order', 'Day_of_order'], as_index=False).Sales.sum()
sales_per_purchase_month = sales_per_purchase_month.sort_values(by=['Month_of_Order'], ascending=True)

Let's Take a Peep at Our Weekly and Monthly Sales Trend Over All The Four Consecutive Years In Our Historical Database

In [9]:
import plotly.express as px

salesdata = sales_per_purchase_month
fig = px.line(df, x="Month_of_Order", y="Sales", color='Day_of_order', title='Sales Each Month & Each DayofWeek')

fig.update_layout(
    title="Sales Each Month & Each DayofWeek",
    xaxis_title="Months",
    yaxis_title="Sales(in $$)",
    font=dict(
        family="Courier New, monospace",
        size=15,
        color="#7f7f7f"
    )
)
fig.show()
In [215]:
# creating an aggregation
avg_sales_per_category = df.groupby('CategoryName', as_index=False).agg({'Sales': ['count', 'mean']})
avg_sales_per_category.columns = ['Product Category', 'Total Sales', 'Average Sales']


avg_sales_per_category = avg_sales_per_category[avg_sales_per_category['Total Sales'] > 100]
avg_sales_per_category = avg_sales_per_category.sort_values(by='Total Sales', ascending=False)
avg_sales_per_category
Out[215]:
Product Category Total Sales Average Sales
1 Office Supplies 6026 119.324101
0 Furniture 2121 349.834887
2 Technology 1847 452.709276

Let's View Our Top Categories by Average Sales Made Over a Span of 4 Consecutive Years

In [216]:
import plotly.express as px
avg_sales = avg_sales_per_category[:20]
fig = px.bar(avg_sales, x='Product Category', y='Total Sales',
             hover_data=['Average Sales'], color='Average Sales',
             height=500)
fig.show()
In [221]:
# creating an aggregation

avg_profit_per_category = df.groupby('CategoryName', as_index=False).agg({'Profit': ['count', 'mean']})
avg_profit_per_category.columns = ['Product Category', 'Total Profit', 'Average Profit']


avg_profit_per_category = avg_profit_per_category[avg_profit_per_category['Total Profit'] > 100]
avg_profit_per_category = avg_profit_per_category.sort_values(by='Total Profit', ascending=False)
avg_profit_per_category
Out[221]:
Product Category Total Profit Average Profit
1 Office Supplies 6026 20.327050
0 Furniture 2121 8.699327
2 Technology 1847 78.752002

Top Categories by Average Profits Generated

In [223]:
import plotly.express as px
avg_profit = avg_profit_per_category[:20]
fig = px.bar(avg_profit, x='Product Category', y='Average Profit',
             hover_data=['Total Profit'], color='Total Profit',
             height=500)
fig.show()
In [14]:
# Keeping track of profits made using our knowledge of time
purchaseday = df.copy()
# creating an aggregation 
profit_per_purchase_month = df.groupby(['Month_of_Order', 'Quarter_of_Year_for_Order', 'Day_of_order'], as_index=False).Profit.sum()
profit_per_purchase_month = profit_per_purchase_month.sort_values(by=['Month_of_Order'], ascending=True)
profit_per_purchase_month
Out[14]:
Month_of_Order Quarter_of_Year_for_Order Day_of_order Profit
0 April 2 Friday -273.1101
1 April 2 Monday -1027.7799
2 April 2 Saturday 7860.9157
3 April 2 Sunday -9.0884
4 April 2 Thursday 1795.4848
... ... ... ... ...
78 September 3 Monday 5294.0876
79 September 3 Saturday 3897.2571
80 September 3 Sunday 7321.1271
81 September 3 Thursday 4670.9605
83 September 3 Wednesday 2513.7373

84 rows × 4 columns

In [15]:
# creating a purchase day feature alongside aggregration using our knowledge of time
sales_per_month = df.groupby(['Month_of_Order', 'Day_of_order','Quarter_of_Year_for_Order'], as_index=False).Sales.sum()
sales_per__month = sales_per_purchase_month.sort_values(by=['Month_of_Order'], ascending=True)
sales_per_month
Out[15]:
Month_of_Order Day_of_order Quarter_of_Year_for_Order Sales
0 April Friday 2 22441.5335
1 April Monday 2 20433.2300
2 April Saturday 2 40912.0760
3 April Sunday 2 18378.0135
4 April Thursday 2 15289.8226
... ... ... ... ...
79 September Saturday 3 39635.9630
80 September Sunday 3 55147.9550
81 September Thursday 3 42917.0974
82 September Tuesday 3 36624.3308
83 September Wednesday 3 11553.2230

84 rows × 4 columns

In [16]:
# creating a purchase day feature
df = df.copy()
# creating an aggregation


sales_per_month = df.groupby(['Month_of_Order', 'Day_of_order'], as_index=False).Sales.mean()

sales_per_month
Out[16]:
Month_of_Order Day_of_order Sales
0 April Friday 182.451492
1 April Monday 206.396263
2 April Saturday 338.116331
3 April Sunday 141.369335
4 April Thursday 186.461251
... ... ... ...
79 September Saturday 167.240350
80 September Sunday 222.370786
81 September Thursday 212.460878
82 September Tuesday 256.114201
83 September Wednesday 275.076738

84 rows × 3 columns

In [17]:
sales_per_month = sales_per_month.sort_values(by=['Day_of_order'], ascending=True)
sales_per_month
Out[17]:
Month_of_Order Day_of_order Sales
0 April Friday 182.451492
28 January Friday 234.915717
14 December Friday 224.803234
77 September Friday 244.338868
35 July Friday 264.459748
... ... ... ...
20 December Wednesday 212.131138
13 August Wednesday 328.707143
6 April Wednesday 183.975846
69 November Wednesday 256.570101
83 September Wednesday 275.076738

84 rows × 3 columns

In [18]:
sales_per_category = df.groupby(['Month_of_Order', 'Product_Name'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Purchase Month','Product Category', 'Sales Revenue']
sales_per_category
Out[18]:
Purchase Month Product Category Sales Revenue
6178 October Canon imageCLASS 2200 Advanced Copier 28699.918
4019 March Cisco TelePresence System EX90 Videoconferenci... 22638.480
4004 March Canon imageCLASS 2200 Advanced Copier 13999.960
1471 December GBC Ibimaster 500 Manual ProClick Binding System 11490.798
7186 September Lexmark MX611dhe Monochrome Laser Printer 11219.934
... ... ... ...
6408 October Maxell 4.7GB DVD-R 5/Pack 0.990
3356 June Avery Triangle Shaped Sheet Lifters, Black, 2/... 0.984
6711 September Acco 3-Hole Punch 0.876
3343 June Avery Durable Slant Ring Binders With Label Ho... 0.836
3565 June Hoover Replacement Belt for Commercial Guardsm... 0.444

7629 rows × 3 columns

In [20]:
import plotly.express as px
In [21]:
sales_per_category = df.groupby(['Month_of_Order', 'CategoryName'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Purchase_Month','CategoryName', 'Sales_Revenue']
sales_per_category
Out[21]:
Purchase_Month CategoryName Sales_Revenue
29 November Technology 131134.8410
6 December Furniture 121817.9675
27 November Furniture 121286.0490
33 September Furniture 106380.5907
7 December Office Supplies 102642.6440
34 September Office Supplies 101888.6460
8 December Technology 100832.8920
28 November Office Supplies 100040.1810
35 September Technology 99380.7090
23 March Technology 97851.5700
32 October Technology 87031.9450
26 May Technology 63642.1550
4 August Office Supplies 62134.1620
30 October Furniture 58087.5157
22 March Office Supplies 56385.6430
31 October Office Supplies 55203.5240
17 July Technology 54854.0260
18 June Furniture 52999.4633
5 August Technology 52025.8560
19 June Office Supplies 51414.6330
21 March Furniture 50768.2758
1 April Office Supplies 49433.2700
15 July Furniture 49377.0120
24 May Furniture 48365.1307
20 June Technology 48304.5830
2 April Technology 47630.2750
3 August Furniture 44884.0450
25 May Office Supplies 43021.5260
16 July Office Supplies 43007.0590
0 April Furniture 40698.5836
13 January Office Supplies 33233.8310
12 January Furniture 31569.2416
14 January Technology 30121.7630
11 February Technology 23343.4180
10 February Office Supplies 20641.9130
9 February Furniture 15765.9204

An Interactive and Insightful View of Our Top Categories by Sales Revenue

In [22]:
import plotly.express as px

df = sales_per_category
fig = px.bar(df, y='Sales_Revenue', x='CategoryName', text='Sales_Revenue', hover_data=['Purchase_Month'])
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(barmode='stack',uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [137]:
df  = pd.read_csv('Sample store final.csv', error_bad_lines=False, engine='python')
df
Out[137]:
Row_ID Order_ID Order_Date Ship_Date Ship_Mode Customer_ID Customer_Name Segment Country City ... Order_Time Month_of_Order Week_of_Year_for_Order Quarter_of_Year_for_Order Year_of_shipping_date Quarter_of_shipping_date Week_of_year_of_shipment Month_of_shipment Day_of_shipment Day_of_order
0 7981 CA-2014-103800 1/3/2014 0:00 1/7/2014 0:00 Standard Class DP-13000 Darren Powers Consumer United States Houston ... 12:00:00 AM January 1 1 2014 1 2 January Tuesday Friday
1 740 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
2 741 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
3 742 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
4 1760 CA-2014-141817 1/5/2014 0:00 1/12/2014 0:00 Standard Class MB-18085 Mick Brown Consumer United States Philadelphia ... 12:00:00 AM January 2 1 2014 1 3 January Sunday Sunday
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9989 908 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9990 909 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9991 1297 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9992 1298 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9993 5092 CA-2017-156720 12/30/2017 0:00 1/3/2018 0:00 Standard Class JM-15580 Jill Matthias Consumer United States Loveland ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday

9994 rows × 32 columns

In [25]:
sales_per_category = df.groupby(['Month_of_Order', 'CategoryName'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Purchase_Month','CategoryName', 'Sales_Revenue']
sales_per_category
Out[25]:
Purchase_Month CategoryName Sales_Revenue
29 November Technology 131134.8410
6 December Furniture 121817.9675
27 November Furniture 121286.0490
33 September Furniture 106380.5907
7 December Office Supplies 102642.6440
34 September Office Supplies 101888.6460
8 December Technology 100832.8920
28 November Office Supplies 100040.1810
35 September Technology 99380.7090
23 March Technology 97851.5700
32 October Technology 87031.9450
26 May Technology 63642.1550
4 August Office Supplies 62134.1620
30 October Furniture 58087.5157
22 March Office Supplies 56385.6430
31 October Office Supplies 55203.5240
17 July Technology 54854.0260
18 June Furniture 52999.4633
5 August Technology 52025.8560
19 June Office Supplies 51414.6330
21 March Furniture 50768.2758
1 April Office Supplies 49433.2700
15 July Furniture 49377.0120
24 May Furniture 48365.1307
20 June Technology 48304.5830
2 April Technology 47630.2750
3 August Furniture 44884.0450
25 May Office Supplies 43021.5260
16 July Office Supplies 43007.0590
0 April Furniture 40698.5836
13 January Office Supplies 33233.8310
12 January Furniture 31569.2416
14 January Technology 30121.7630
11 February Technology 23343.4180
10 February Office Supplies 20641.9130
9 February Furniture 15765.9204
In [26]:
sales_per_category = df.groupby(['Month_of_Order', 'Product_Name'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Purchase_Month','Product_Name', 'Sales_Revenue']
sales_per_category
Out[26]:
Purchase_Month Product_Name Sales_Revenue
6178 October Canon imageCLASS 2200 Advanced Copier 28699.918
4019 March Cisco TelePresence System EX90 Videoconferenci... 22638.480
4004 March Canon imageCLASS 2200 Advanced Copier 13999.960
1471 December GBC Ibimaster 500 Manual ProClick Binding System 11490.798
7186 September Lexmark MX611dhe Monochrome Laser Printer 11219.934
... ... ... ...
6408 October Maxell 4.7GB DVD-R 5/Pack 0.990
3356 June Avery Triangle Shaped Sheet Lifters, Black, 2/... 0.984
6711 September Acco 3-Hole Punch 0.876
3343 June Avery Durable Slant Ring Binders With Label Ho... 0.836
3565 June Hoover Replacement Belt for Commercial Guardsm... 0.444

7629 rows × 3 columns

Metrics For Digital Marketing Dashboard

Brand/Category Performance Over Years

Horizontal Metrics For Dashboard (YTD FOR 2014 To 2017)

In [ ]:
 
In [37]:
total_rev_year = df.groupby(['Year_of_Order'], as_index=False).Sales.sum()
#total_rev_year = total_rev_year[total_rev_year['Year_of_Order']== 2018]
total_rev_year
Out[37]:
Year_of_Order Sales
0 2014 484247.4981
1 2015 470532.5090
2 2016 609205.5980
3 2017 733215.2552
In [38]:
total_orders = df.groupby(['Year_of_Order'], as_index=False).Order_ID.nunique()
total_orders
Out[38]:
0     969
1    1038
2    1315
3    1687
Name: Order_ID, dtype: int64
In [39]:
total_category = df.Product_Name.nunique()
total_category
Out[39]:
1850
In [40]:
total_customers = df.Customer_ID.nunique()
total_customers
Out[40]:
793
In [41]:
df['Year_of_Order'].value_counts()
Out[41]:
2017    3312
2016    2587
2015    2102
2014    1993
Name: Year_of_Order, dtype: int64

Vertical Metrics For Dashboard

In [42]:
import plotly.graph_objects as go

df = df[df.Year_of_Order == 2014]
sales_per_category = df.groupby(['CategoryName'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Product Category', 'Sales_Revenue']

sales_per_category = sales_per_category[:20]
labels = sales_per_category['Product Category']
values = sales_per_category['Sales_Revenue']

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
In [117]:
df  = pd.read_csv('Sample store final.csv', error_bad_lines=False, engine='python')
df
Out[117]:
Row_ID Order_ID Order_Date Ship_Date Ship_Mode Customer_ID Customer_Name Segment Country City ... Order_Time Month_of_Order Week_of_Year_for_Order Quarter_of_Year_for_Order Year_of_shipping_date Quarter_of_shipping_date Week_of_year_of_shipment Month_of_shipment Day_of_shipment Day_of_order
0 7981 CA-2014-103800 1/3/2014 0:00 1/7/2014 0:00 Standard Class DP-13000 Darren Powers Consumer United States Houston ... 12:00:00 AM January 1 1 2014 1 2 January Tuesday Friday
1 740 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
2 741 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
3 742 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
4 1760 CA-2014-141817 1/5/2014 0:00 1/12/2014 0:00 Standard Class MB-18085 Mick Brown Consumer United States Philadelphia ... 12:00:00 AM January 2 1 2014 1 3 January Sunday Sunday
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9989 908 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9990 909 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9991 1297 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9992 1298 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9993 5092 CA-2017-156720 12/30/2017 0:00 1/3/2018 0:00 Standard Class JM-15580 Jill Matthias Consumer United States Loveland ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday

9994 rows × 32 columns

Total Sales By Category For Individual Year

In [132]:
import plotly.graph_objects as go

df = df[df.Year_of_Order == 2015]
sales_per_category = df.groupby(['CategoryName'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Product Category', 'Sales_Revenue']

sales_per_category = sales_per_category[:20]
labels = sales_per_category['Product Category']
values = sales_per_category['Sales_Revenue']

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
In [44]:
import plotly.graph_objects as go

df = df[df.Year_of_Order == 2017]
sales_per_category = df.groupby(['CategoryName'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Product Category', 'Sales_Revenue']

sales_per_category = sales_per_category[:20]
labels = sales_per_category['Product Category']
values = sales_per_category['Sales_Revenue']

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
In [176]:
import plotly.graph_objects as go

df = df[df.Year_of_Order == 2016]
sales_per_category = df.groupby(['CategoryName'], as_index=False).Sales.sum()
sales_per_category = sales_per_category.sort_values(by=['Sales'], ascending=False)
sales_per_category.columns = ['Product Category', 'Sales Revenue']

sales_per_category = sales_per_category[:20]
labels = sales_per_category['Product Category']
values = sales_per_category['Sales Revenue']

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
In [127]:
df  = pd.read_csv('Sample store final.csv', error_bad_lines=False, engine='python')
df
Out[127]:
Row_ID Order_ID Order_Date Ship_Date Ship_Mode Customer_ID Customer_Name Segment Country City ... Order_Time Month_of_Order Week_of_Year_for_Order Quarter_of_Year_for_Order Year_of_shipping_date Quarter_of_shipping_date Week_of_year_of_shipment Month_of_shipment Day_of_shipment Day_of_order
0 7981 CA-2014-103800 1/3/2014 0:00 1/7/2014 0:00 Standard Class DP-13000 Darren Powers Consumer United States Houston ... 12:00:00 AM January 1 1 2014 1 2 January Tuesday Friday
1 740 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
2 741 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
3 742 CA-2014-112326 1/4/2014 0:00 1/8/2014 0:00 Standard Class PO-19195 Phillina Ober Home Office United States Naperville ... 12:00:00 AM January 1 1 2014 1 2 January Wednesday Saturday
4 1760 CA-2014-141817 1/5/2014 0:00 1/12/2014 0:00 Standard Class MB-18085 Mick Brown Consumer United States Philadelphia ... 12:00:00 AM January 2 1 2014 1 3 January Sunday Sunday
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9989 908 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9990 909 CA-2017-143259 12/30/2017 0:00 1/3/2018 0:00 Standard Class PO-18865 Patrick O'Donnell Consumer United States New York City ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9991 1297 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9992 1298 CA-2017-115427 12/30/2017 0:00 1/3/2018 0:00 Standard Class EB-13975 Erica Bern Corporate United States Fairfield ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday
9993 5092 CA-2017-156720 12/30/2017 0:00 1/3/2018 0:00 Standard Class JM-15580 Jill Matthias Consumer United States Loveland ... 12:00:00 AM December 52 4 2018 1 1 January Wednesday Saturday

9994 rows × 32 columns

In [156]:
total_rev_month = df.groupby(['Year_of_Order', 'Month_of_Order', 'CategoryName'], as_index=False).Sales.sum()
total_rev_month.columns = ['Sales Year','Sales Month','Product Category' , 'Sales Revenue']
total_rev_month
Out[156]:
Sales Year Sales Month Product Category Sales Revenue
0 2014 April Furniture 7944.8370
1 2014 April Office Supplies 11155.0740
2 2014 April Technology 9195.4340
3 2014 August Furniture 7320.3465
4 2014 August Office Supplies 11379.4620
... ... ... ... ...
139 2017 October Office Supplies 23037.1920
140 2017 October Technology 32855.6630
141 2017 September Furniture 29028.2060
142 2017 September Office Supplies 31895.8430
143 2017 September Technology 26942.6030

144 rows × 4 columns

Category Sales Across Years

In [164]:
import plotly.express as px
import numpy as np
df = total_rev_month
fig = px.sunburst(df, path=['Sales Year', 'Product Category'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['Sales Revenue'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()
In [163]:
total_profit_month = df.groupby(['Year_of_Order', 'Month_of_Order', 'CategoryName'], as_index=False).Profit.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_profit_month.columns = ['Profit for Year','Sales Month','Product Category' , 'Profit']
total_profit_month
Out[163]:
Profit for Year Sales Month Product Category Profit
0 2014 April Furniture 554.3801
1 2014 April Office Supplies 1000.2086
2 2014 April Technology 1934.2465
3 2014 August Furniture 663.4717
4 2014 August Office Supplies 2767.6007
... ... ... ... ...
139 2017 October Office Supplies 4040.7751
140 2017 October Technology 7761.4207
141 2017 September Furniture 1548.6837
142 2017 September Office Supplies 4847.7438
143 2017 September Technology 4595.1281

144 rows × 4 columns

In [53]:
import plotly.express as px
import numpy as np
df = total_profit_month
fig = px.sunburst(df, path=['Profit for Year', 'Product Category'], values='Profit',
                  color='Profit', hover_data=['Profit'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Profit'], weights=df['Profit']))
fig.show()

Let's Draw Insights From Our Weekday Sales For Each Individual Year

In [159]:
total_weekday_rev = df[df['Year_of_Order'] == 2014].groupby(['Day_of_order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_weekday_rev.columns = ['Day_of_order','Product Category' , 'Sales Revenue']
total_weekday_rev
Out[159]:
Day_of_order Product Category Sales Revenue
0 Friday Furniture 29898.5326
1 Friday Office Supplies 24843.0840
2 Friday Technology 29865.7490
3 Monday Furniture 43010.1400
4 Monday Office Supplies 32885.2530
5 Monday Technology 26629.2940
6 Saturday Furniture 22099.8520
7 Saturday Office Supplies 25744.4840
8 Saturday Technology 22875.1970
9 Sunday Furniture 18006.1435
10 Sunday Office Supplies 19008.7760
11 Sunday Technology 23533.2740
12 Thursday Furniture 775.8068
13 Thursday Office Supplies 877.3960
14 Thursday Technology 4443.2780
15 Tuesday Furniture 28805.9280
16 Tuesday Office Supplies 36555.3770
17 Tuesday Technology 47387.5460
18 Wednesday Furniture 14596.4502
19 Wednesday Office Supplies 11862.0420
20 Wednesday Technology 20543.8950
In [160]:
#Year 2014
labels = total_weekday_rev['Day_of_order']
values = total_weekday_rev['Sales Revenue']

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
In [ ]:
 
In [161]:
df = total_weekday_rev
fig = px.sunburst(df, path=['Day_of_order', 'Product Category'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['Product Category'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()
In [166]:
total_weekday_rev = df[df['Year_of_Order'] == 2015].groupby(['Day_of_order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_weekday_rev.columns = ['Day of Order','Product Category' , 'Sales Revenue']
total_weekday_rev
Out[166]:
Day of Order Product Category Sales Revenue
0 Friday Furniture 23028.0777
1 Friday Office Supplies 22366.9780
2 Friday Technology 39778.1480
3 Monday Furniture 31682.2280
4 Monday Office Supplies 30179.4910
5 Monday Technology 29149.6380
6 Saturday Furniture 17831.9494
7 Saturday Office Supplies 22790.9930
8 Saturday Technology 21769.4900
9 Sunday Furniture 36397.5516
10 Sunday Office Supplies 18607.2400
11 Sunday Technology 29257.6110
12 Thursday Furniture 37144.8033
13 Thursday Office Supplies 22396.2990
14 Thursday Technology 24008.7640
15 Tuesday Furniture 19865.3330
16 Tuesday Office Supplies 19340.8280
17 Tuesday Technology 15669.5580
18 Wednesday Furniture 4568.2940
19 Wednesday Office Supplies 1551.6340
20 Wednesday Technology 3147.6000
In [167]:
#Year 2015
labels = total_weekday_rev['Day of Order']
values = total_weekday_rev['Sales Revenue']

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
In [171]:
df = total_weekday_rev
fig = px.sunburst(df, path=['Day of Order', 'Product Category'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['Product Category'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()
In [173]:
total_weekday_rev = df[df['Year_of_Order'] == 2016].groupby(['Day_of_order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_weekday_rev.columns = ['Day of Order','Product Category' , 'Sales Revenue']
total_weekday_rev
Out[173]:
Day of Order Product Category Sales Revenue
0 Friday Furniture 39789.9775
1 Friday Office Supplies 35635.8630
2 Friday Technology 39641.2550
3 Monday Furniture 36911.3115
4 Monday Office Supplies 27403.0910
5 Monday Technology 36573.1830
6 Saturday Furniture 30976.9518
7 Saturday Office Supplies 33047.6930
8 Saturday Technology 33896.4130
9 Sunday Furniture 31654.6168
10 Sunday Office Supplies 35099.8730
11 Sunday Technology 51073.6840
12 Thursday Furniture 35316.4426
13 Thursday Office Supplies 33696.6210
14 Thursday Technology 30071.8340
15 Tuesday Furniture 21919.3658
16 Tuesday Office Supplies 15444.5790
17 Tuesday Technology 32402.0690
18 Wednesday Furniture 2332.7700
19 Wednesday Office Supplies 3612.2620
20 Wednesday Technology 2705.7420
In [175]:
df = total_weekday_rev
fig = px.sunburst(df, path=['Day of Order', 'Product Category'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['Product Category'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()
In [178]:
total_weekday_rev = df[df['Year_of_Order'] == 2017].groupby(['Day_of_order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_weekday_rev.columns = ['Day of Order','Product Category' , 'Sales Revenue']
total_weekday_rev
Out[178]:
Day of Order Product Category Sales Revenue
0 Friday Furniture 42940.6595
1 Friday Office Supplies 51147.3530
2 Friday Technology 48756.5820
3 Monday Furniture 48222.9232
4 Monday Office Supplies 46030.8700
5 Monday Technology 40260.3930
6 Saturday Furniture 31649.8748
7 Saturday Office Supplies 41986.8600
8 Saturday Technology 53314.9250
9 Sunday Furniture 32370.1845
10 Sunday Office Supplies 38636.0220
11 Sunday Technology 51708.8700
12 Thursday Furniture 33842.8408
13 Thursday Office Supplies 45977.6760
14 Thursday Technology 53414.0170
15 Tuesday Furniture 15241.7184
16 Tuesday Office Supplies 18313.2900
17 Tuesday Technology 16263.9100
18 Wednesday Furniture 11119.0680
19 Wednesday Office Supplies 4005.1040
20 Wednesday Technology 8012.1140
In [179]:
#Year 2017
labels = total_weekday_rev['Day of Order']
values = total_weekday_rev['Sales Revenue']

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
In [182]:
df = total_weekday_rev
fig = px.sunburst(df, path=['Day of Order', 'Product Category'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['Product Category'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()

Category Activity Alongside Total Purchases

In [69]:
cat_activity = df[df['Year_of_Order'] == 2015].groupby(['CategoryName']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[69]:
Activity
CategoryName
Office Supplies 778
Furniture 371
Technology 338
In [71]:
#Year 2015
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [72]:
cat_activity = df[df['Year_of_Order'] == 2016].groupby(['CategoryName']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[72]:
Activity
CategoryName
Office Supplies 946
Furniture 476
Technology 389
In [73]:
#Year 2016
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [74]:
cat_activity = df[df['Year_of_Order'] == 2014].groupby(['CategoryName']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[74]:
Activity
CategoryName
Office Supplies 746
Furniture 353
Technology 291
In [75]:
#Year 2014
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [76]:
cat_activity = df[df['Year_of_Order'] == 2017].groupby(['CategoryName']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[76]:
Activity
CategoryName
Office Supplies 1272
Furniture 564
Technology 526
In [77]:
#Year 2014
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [78]:
cat_activity = df[df['Year_of_Order'] == 2014].groupby(['Region']).Sales.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[78]:
Activity
Region
West 608
East 493
Central 443
South 328
In [79]:
cat_activity = df[df['Year_of_Order'] == 2015].groupby(['Region']).Sales.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[79]:
Activity
Region
West 587
East 586
Central 452
South 326
In [80]:
#Year 2015
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [81]:
cat_activity = df[df['Year_of_Order'] == 2016].groupby(['Region']).Sales.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[81]:
Activity
Region
West 716
East 701
Central 565
South 393
In [82]:
#Year 2016
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [83]:
cat_activity = df[df['Year_of_Order'] == 2017].groupby(['Region']).Sales.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[83]:
Activity
Region
West 952
East 844
Central 711
South 481
In [84]:
#Year 2017
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [85]:
cat_activity = df[df['Year_of_Order'] == 2014].groupby(['State']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[85]:
Activity
State
California 197
New York 107
Texas 99
Illinois 61
Pennsylvania 53
Ohio 50
Washington 45
Florida 41
Virginia 31
Arizona 25
North Carolina 24
Michigan 22
Georgia 15
Tennessee 13
Colorado 12
Delaware 11
New Jersey 11
Kentucky 11
Oregon 11
Indiana 11
Massachusetts 10
Minnesota 9
Wisconsin 8
Connecticut 8
Utah 8
Alabama 7
Arkansas 7
Missouri 6
Nevada 6
South Carolina 6
Louisiana 5
Mississippi 5
Maryland 5
New Mexico 4
Nebraska 4
Oklahoma 4
Kansas 3
Rhode Island 3
Montana 3
South Dakota 2
Idaho 2
New Hampshire 2
Maine 1
Iowa 1
In [86]:
 #Year 2014
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [87]:
cat_activity = df[df['Year_of_Order'] == 2015].groupby(['State']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[87]:
Activity
State
California 205
New York 126
Texas 102
Pennsylvania 65
Illinois 48
Washington 47
Florida 42
Ohio 35
Michigan 31
North Carolina 28
Virginia 28
Arizona 26
Colorado 20
Massachusetts 20
Tennessee 19
Georgia 19
Indiana 17
Oregon 15
Kentucky 14
Delaware 13
Maryland 10
Wisconsin 9
Minnesota 8
New Jersey 7
Utah 7
Missouri 7
Connecticut 7
Nevada 7
Alabama 6
New Hampshire 6
New Mexico 5
Nebraska 4
Louisiana 4
South Carolina 4
Arkansas 4
Montana 3
Oklahoma 3
Rhode Island 3
Iowa 3
Idaho 3
Kansas 2
Vermont 2
Mississippi 2
Maine 1
District of Columbia 1
In [88]:
 #Year 2015
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [89]:
cat_activity = df[df['Year_of_Order'] == 2016].groupby(['State']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[89]:
Activity
State
California 275
New York 155
Texas 122
Pennsylvania 71
Washington 68
Illinois 67
Ohio 65
Florida 47
North Carolina 36
Virginia 32
Michigan 30
Arizona 25
Georgia 25
Tennessee 25
Colorado 23
New Jersey 21
Indiana 20
Wisconsin 20
Oklahoma 16
Oregon 15
Maryland 12
Kentucky 12
Connecticut 12
Mississippi 12
Delaware 11
Alabama 10
Massachusetts 10
Rhode Island 8
Arkansas 7
Missouri 7
Minnesota 7
Iowa 6
Nevada 6
Kansas 5
Nebraska 5
Utah 4
New Hampshire 4
South Carolina 4
Louisiana 4
New Mexico 3
Vermont 2
Idaho 2
District of Columbia 2
Maine 1
Wyoming 1
In [90]:
 #Year 2016
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [91]:
cat_activity = df[df['Year_of_Order'] == 2017].groupby(['State']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[91]:
Activity
State
California 344
New York 174
Texas 164
Illinois 100
Pennsylvania 99
Washington 96
Ohio 86
Florida 70
North Carolina 48
Michigan 34
Tennessee 34
Arizona 32
Georgia 32
Indiana 25
Kentucky 24
Colorado 24
Virginia 24
Massachusetts 22
New Jersey 22
Minnesota 20
Maryland 19
Connecticut 18
Wisconsin 16
Oregon 15
Oklahoma 11
Rhode Island 11
Alabama 11
New Mexico 10
Nebraska 10
Missouri 10
Arkansas 9
Delaware 9
Louisiana 8
Mississippi 7
Utah 7
Iowa 7
South Carolina 6
New Hampshire 5
Idaho 4
Kansas 4
Nevada 4
South Dakota 3
Vermont 2
North Dakota 2
West Virginia 2
Montana 2
District of Columbia 1
In [92]:
 #Year 2016
fig = px.funnel_area(names=cat_activity.index,
                    values=cat_activity.Activity)
fig.show()
In [93]:
cat_activity = df[df['Year_of_Order'] == 2014].groupby(['City']).Order_ID.nunique()
#cat_activity = cat_activity.sort_values(['Activity'], ascending=True)
cat_activity = pd.DataFrame(cat_activity)
cat_activity.columns = [ 'Activity']
cat_activity = cat_activity.sort_values(by=['Activity'], ascending=False)
cat_activity
Out[93]:
Activity
City
New York City 77
Los Angeles 68
San Francisco 59
Philadelphia 49
Houston 40
... ...
Naperville 1
Nashville 1
Des Plaines 1
Delray Beach 1
Palm Coast 1

268 rows × 1 columns

In [94]:
total_year_rev = df[df['Year_of_Order'] == 2014].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_year_rev.columns = ['Month_of_order','CategoryName' , 'Sales Revenue']
total_year_rev
Out[94]:
Month_of_order CategoryName Sales Revenue
0 April Furniture 7944.8370
1 April Office Supplies 11155.0740
2 April Technology 9195.4340
3 August Furniture 7320.3465
4 August Office Supplies 11379.4620
5 August Technology 9209.6600
6 December Furniture 30645.9665
7 December Office Supplies 18006.4620
8 December Technology 20893.1920
9 February Furniture 1839.6580
10 February Office Supplies 1071.7240
11 February Technology 1608.5100
12 January Furniture 6242.5250
13 January Office Supplies 4851.0800
14 January Technology 3143.2900
15 July Furniture 10821.0510
16 July Office Supplies 15121.2080
17 July Technology 8004.1340
18 June Furniture 13206.1256
19 June Office Supplies 12953.0370
20 June Technology 8435.9650
21 March Furniture 14573.9560
22 March Office Supplies 8605.8790
23 March Technology 32511.1740
24 May Furniture 6912.7870
25 May Office Supplies 7135.6240
26 May Technology 9599.8760
27 November Furniture 21564.8727
28 November Office Supplies 26862.4360
29 November Technology 30201.4080
30 October Furniture 12304.2470
31 October Office Supplies 7211.1280
32 October Technology 11938.0180
33 September Furniture 23816.4808
34 September Office Supplies 27423.2980
35 September Technology 30537.5720
In [95]:
#Year 2014
df = total_year_rev
fig = px.sunburst(df, path=['Sales Revenue',  'CategoryName'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()
In [105]:
total_year_rev = df[df['Year_of_Order'] == 2015].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_year_rev.columns = ['Month_of_order','CategoryName' , 'Sales Revenue']
total_year_rev
Out[105]:
Month_of_order CategoryName Sales Revenue
0 April Furniture 10475.6985
1 April Office Supplies 12558.5580
2 April Technology 11160.9520
3 August Furniture 9638.5922
4 August Office Supplies 11735.1080
5 August Technology 15524.6320
6 December Furniture 23085.8192
7 December Office Supplies 16201.6740
8 December Technology 35632.0280
9 February Furniture 3134.3740
10 February Office Supplies 5368.0670
11 February Technology 3448.9700
12 January Furniture 11739.9416
13 January Office Supplies 1808.7800
14 January Technology 4625.3540
15 July Furniture 13674.4200
16 July Office Supplies 4719.9390
17 July Technology 10370.9660
18 June Furniture 7714.1790
19 June Office Supplies 10647.7470
20 June Technology 6435.3660
21 March Furniture 12499.7830
22 March Office Supplies 15882.5470
23 March Technology 10343.9220
24 May Furniture 9374.9505
25 May Office Supplies 9113.7360
26 May Technology 11643.0000
27 November Furniture 30880.8325
28 November Office Supplies 21218.1300
29 November Technology 23873.6010
30 October Furniture 12026.6235
31 October Office Supplies 8673.4100
32 October Technology 10704.8900
33 September Furniture 26273.0230
34 September Office Supplies 19305.7670
35 September Technology 19017.1280

Let's Take a Glimpse at Our Sales Revenue For All Product Category For each Individual Year

In [106]:
#Year 2015
df = total_year_rev
fig = px.sunburst(df, path=['Sales Revenue',  'CategoryName'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()
In [102]:
total_year_rev = df[df['Year_of_Order'] == 2016].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_year_rev.columns = ['Month_of_order','CategoryName' , 'Sales Revenue']
total_year_rev
Out[102]:
Month_of_order CategoryName Sales Revenue
0 April Furniture 13212.0900
1 April Office Supplies 10647.4470
2 April Technology 14890.5020
3 August Furniture 12483.2323
4 August Office Supplies 8959.7400
5 August Technology 9672.4020
6 December Furniture 36678.7150
7 December Office Supplies 37997.5660
8 December Technology 22322.7620
9 February Furniture 3925.5510
10 February Office Supplies 6794.3500
11 February Technology 12258.9140
12 January Furniture 7622.7430
13 January Office Supplies 5299.6820
14 January Technology 5620.0660
15 July Furniture 13068.5190
16 July Office Supplies 12924.4440
17 July Technology 13269.0000
18 June Furniture 13070.5720
19 June Office Supplies 10901.8100
20 June Technology 16372.1520
21 March Furniture 12801.0920
22 March Office Supplies 17346.9310
23 March Technology 21567.8520
24 May Furniture 15119.8350
25 May Office Supplies 13035.2020
26 May Technology 28832.6910
27 November Furniture 31783.6288
28 November Office Supplies 20487.2780
29 November Technology 27141.0590
30 October Furniture 11872.5770
31 October Office Supplies 16281.7940
32 October Technology 31533.3740
33 September Furniture 27262.8809
34 September Office Supplies 23263.7380
35 September Technology 22883.4060
In [103]:
#Year 2016
df = total_year_rev
fig = px.sunburst(df, path=['Sales Revenue',  'CategoryName'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()
In [125]:
total_year_rev = df[df['Year_of_Order'] == 2017].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Sales.sum()
#total_rev_month = total_rev_month.sort_values(by=['order_purchase_year'], ascending=True)
total_year_rev.columns = ['Month_of_order','CategoryName' , 'Sales Revenue']
total_year_rev
Out[125]:
Month_of_order CategoryName Sales Revenue
0 April Furniture 9065.9581
1 April Office Supplies 15072.1910
2 April Technology 12383.3870
3 August Furniture 15441.8740
4 August Office Supplies 30059.8520
5 August Technology 17619.1620
6 December Furniture 31407.4668
7 December Office Supplies 30436.9420
8 December Technology 21984.9100
9 February Furniture 6866.3374
10 February Office Supplies 7407.7720
11 February Technology 6027.0240
12 January Furniture 5964.0320
13 January Office Supplies 21274.2890
14 January Technology 16733.0530
15 July Furniture 11813.0220
16 July Office Supplies 10241.4680
17 July Technology 23209.9260
18 June Furniture 19008.5867
19 June Office Supplies 16912.0390
20 June Technology 17061.1000
21 March Furniture 10893.4448
22 March Office Supplies 14550.2860
23 March Technology 33428.6220
24 May Furniture 16957.5582
25 May Office Supplies 13736.9640
26 May Technology 13566.5880
27 November Furniture 37056.7150
28 November Office Supplies 31472.3370
29 November Technology 49918.7730
30 October Furniture 21884.0682
31 October Office Supplies 23037.1920
32 October Technology 32855.6630
33 September Furniture 29028.2060
34 September Office Supplies 31895.8430
35 September Technology 26942.6030
In [126]:
#Year 2017
df = total_rev_hour
fig = px.sunburst(df, path=['Sales Revenue',  'CategoryName'], values='Sales Revenue',
                  color='Sales Revenue', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Sales Revenue'], weights=df['Sales Revenue']))
fig.show()

Take a Peep at Profits Made For Each Individual Year

In [112]:
total_year_profit = df[df['Year_of_Order'] == 2014].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Profit.sum()
total_year_profit.columns = ['Month_of_order','CategoryName' , 'Profit']
total_year_profit
Out[112]:
Month_of_order CategoryName Profit
0 April Furniture 554.3801
1 April Office Supplies 1000.2086
2 April Technology 1934.2465
3 August Furniture 663.4717
4 August Office Supplies 2767.6007
5 August Technology 1887.0326
6 December Furniture 2011.8198
7 December Office Supplies 2064.3953
8 December Technology 4907.3548
9 February Furniture 120.6917
10 February Office Supplies 176.0910
11 February Technology 565.5257
12 January Furniture 805.4665
13 January Office Supplies 788.9506
14 January Technology 855.7736
15 July Furniture 374.2467
16 July Office Supplies -2482.0172
17 July Technology 1266.2879
18 June Furniture 596.3153
19 June Office Supplies 3031.4326
20 June Technology 1348.7765
21 March Furniture -1128.6550
22 March Office Supplies 1270.4133
23 March Technology 356.9716
24 May Furniture -349.8776
25 May Office Supplies 1747.2044
26 May Technology 1341.3828
27 November Furniture -297.8958
28 November Office Supplies 4411.9560
29 November Technology 5178.0667
30 October Furniture 249.1645
31 October Office Supplies 1117.6406
32 October Technology 2081.4522
33 September Furniture 1858.5976
34 September Office Supplies 6699.5402
35 September Technology -230.0384
In [114]:
total_year_profit = df[df['Year_of_Order'] == 2014].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Profit.sum()
total_year_profit.columns = ['Month_of_order','CategoryName' , 'Profit']
total_year_profit
Out[114]:
Month_of_order CategoryName Profit
0 April Furniture 554.3801
1 April Office Supplies 1000.2086
2 April Technology 1934.2465
3 August Furniture 663.4717
4 August Office Supplies 2767.6007
5 August Technology 1887.0326
6 December Furniture 2011.8198
7 December Office Supplies 2064.3953
8 December Technology 4907.3548
9 February Furniture 120.6917
10 February Office Supplies 176.0910
11 February Technology 565.5257
12 January Furniture 805.4665
13 January Office Supplies 788.9506
14 January Technology 855.7736
15 July Furniture 374.2467
16 July Office Supplies -2482.0172
17 July Technology 1266.2879
18 June Furniture 596.3153
19 June Office Supplies 3031.4326
20 June Technology 1348.7765
21 March Furniture -1128.6550
22 March Office Supplies 1270.4133
23 March Technology 356.9716
24 May Furniture -349.8776
25 May Office Supplies 1747.2044
26 May Technology 1341.3828
27 November Furniture -297.8958
28 November Office Supplies 4411.9560
29 November Technology 5178.0667
30 October Furniture 249.1645
31 October Office Supplies 1117.6406
32 October Technology 2081.4522
33 September Furniture 1858.5976
34 September Office Supplies 6699.5402
35 September Technology -230.0384
In [115]:
#Year 2014
df = total_year_profit
fig = px.sunburst(df, path=['Profit',  'CategoryName'], values='Profit',
                  color='Profit', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Profit'], weights=df['Profit']))
fig.show()
In [118]:
total_year_profit = df[df['Year_of_Order'] == 2015].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Profit.sum()
total_year_profit.columns = ['Month_of_order','CategoryName' , 'Profit']
total_year_profit
Out[118]:
Month_of_order CategoryName Profit
0 April Furniture 338.6502
1 April Office Supplies 1477.3596
2 April Technology 2371.4864
3 August Furniture -487.4709
4 August Office Supplies 2208.7531
5 August Technology 3634.5262
6 December Furniture 428.8141
7 December Office Supplies 2631.9409
8 December Technology 4956.2109
9 February Furniture 394.8948
10 February Office Supplies 1814.1018
11 February Technology 604.8542
12 January Furniture -3014.2000
13 January Office Supplies 589.8969
14 January Technology -856.7039
15 July Furniture -325.0938
16 July Office Supplies 1113.7829
17 July Technology 2499.9592
18 June Furniture -412.6597
19 June Office Supplies 2195.3864
20 June Technology 1552.8305
21 March Furniture 1455.0573
22 March Office Supplies 5407.8482
23 March Technology 2869.1923
24 May Furniture 900.5847
25 May Office Supplies 1731.6139
26 May Technology 2035.6704
27 November Furniture 2992.0240
28 November Office Supplies 2800.0289
29 November Technology 6682.7355
30 October Furniture -605.5163
31 October Office Supplies 1090.9375
32 October Technology 2331.9448
33 September Furniture 1350.1185
34 September Office Supplies 2037.8837
35 September Technology 4821.1605
In [119]:
#Year 2015
df = total_year_profit
fig = px.sunburst(df, path=['Profit',  'CategoryName'], values='Profit',
                  color='Profit', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Profit'], weights=df['Profit']))
fig.show()
In [122]:
total_year_profit = df[df['Year_of_Order'] == 2015].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Profit.sum()
total_year_profit.columns = ['Month_of_order','CategoryName' , 'Profit']
total_year_profit
Out[122]:
Month_of_order CategoryName Profit
0 April Furniture 338.6502
1 April Office Supplies 1477.3596
2 April Technology 2371.4864
3 August Furniture -487.4709
4 August Office Supplies 2208.7531
5 August Technology 3634.5262
6 December Furniture 428.8141
7 December Office Supplies 2631.9409
8 December Technology 4956.2109
9 February Furniture 394.8948
10 February Office Supplies 1814.1018
11 February Technology 604.8542
12 January Furniture -3014.2000
13 January Office Supplies 589.8969
14 January Technology -856.7039
15 July Furniture -325.0938
16 July Office Supplies 1113.7829
17 July Technology 2499.9592
18 June Furniture -412.6597
19 June Office Supplies 2195.3864
20 June Technology 1552.8305
21 March Furniture 1455.0573
22 March Office Supplies 5407.8482
23 March Technology 2869.1923
24 May Furniture 900.5847
25 May Office Supplies 1731.6139
26 May Technology 2035.6704
27 November Furniture 2992.0240
28 November Office Supplies 2800.0289
29 November Technology 6682.7355
30 October Furniture -605.5163
31 October Office Supplies 1090.9375
32 October Technology 2331.9448
33 September Furniture 1350.1185
34 September Office Supplies 2037.8837
35 September Technology 4821.1605
In [123]:
#Year 2015
df = total_year_profit
fig = px.sunburst(df, path=['Profit',  'CategoryName'], values='Profit',
                  color='Profit', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Profit'], weights=df['Profit']))
fig.show()
In [125]:
total_year_profit = df[df['Year_of_Order'] == 2016].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Profit.sum()
total_year_profit.columns = ['Month_of_order','CategoryName' , 'Profit']
total_year_profit
Out[125]:
Month_of_order CategoryName Profit
0 April Furniture 364.8334
1 April Office Supplies -1100.4821
2 April Technology 3713.4636
3 August Furniture -494.1457
4 August Office Supplies 1629.1913
5 August Technology 927.0237
6 December Furniture 2828.6715
7 December Office Supplies 11466.6686
8 December Technology 3589.9692
9 February Furniture 377.0352
10 February Office Supplies 1330.0009
11 February Technology 3297.5434
12 January Furniture 303.9607
13 January Office Supplies 1604.4712
14 January Technology 916.3914
15 July Furniture 1038.9073
16 July Office Supplies 1123.9602
17 July Technology 2270.0104
18 June Furniture -106.2665
19 June Office Supplies 2467.4894
20 June Technology 2389.1552
21 March Furniture -555.2680
22 March Office Supplies 1061.4056
23 March Technology 3105.8304
24 May Furniture 1824.4739
25 May Office Supplies 2397.5024
26 May Technology 4440.1701
27 November Furniture 819.8088
28 November Office Supplies 4686.9250
29 November Technology -1495.3263
30 October Furniture -144.6600
31 October Office Supplies 4680.0550
32 October Technology 11707.7475
33 September Furniture 702.6025
34 September Office Supplies 3714.0417
35 September Technology 4912.0134
In [126]:
#Year 2016
df = total_year_profit
fig = px.sunburst(df, path=['Profit',  'CategoryName'], values='Profit',
                  color='Profit', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Profit'], weights=df['Profit']))
fig.show()
In [128]:
total_year_profit = df[df['Year_of_Order'] == 2017].groupby(['Month_of_Order', 'CategoryName'], as_index=False).Profit.sum()
total_year_profit.columns = ['Month_of_order','CategoryName' , 'Profit']
total_year_profit
Out[128]:
Month_of_order CategoryName Profit
0 April Furniture 202.4624
1 April Office Supplies 3370.6015
2 April Technology -2639.7739
3 August Furniture 322.2390
4 August Office Supplies 6067.8582
5 August Technology 2650.8585
6 December Furniture 1146.7548
7 December Office Supplies 1774.3132
8 December Technology 5562.2788
9 February Furniture -199.0421
10 February Office Supplies 953.8253
11 February Technology 859.0888
12 January Furniture -39.4402
13 January Office Supplies 3260.4467
14 January Technology 3919.4326
15 July Furniture 324.6244
16 July Office Supplies 2563.2845
17 July Technology 4064.7123
18 June Furniture 904.9956
19 June Office Supplies 3378.9821
20 June Technology 3939.3580
21 March Furniture 1000.8532
22 March Office Supplies 2715.9624
23 March Technology 11035.0759
24 May Furniture -72.8829
25 May Office Supplies 3153.7260
26 May Technology 3261.7397
27 November Furniture 406.0637
28 November Office Supplies 3609.1029
29 November Technology 5674.9371
30 October Furniture -2526.9203
31 October Office Supplies 4040.7751
32 October Technology 7761.4207
33 September Furniture 1548.6837
34 September Office Supplies 4847.7438
35 September Technology 4595.1281
In [130]:
#Year 2017
df = total_year_profit
fig = px.sunburst(df, path=['Profit',  'CategoryName'], values='Profit',
                  color='Profit', hover_data=['CategoryName'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Profit'], weights=df['Profit']))
fig.show()